Skip to content

Commit cb9d7b3

Browse files
committed
Add new llamafile-tokenize command
1 parent 208f5b4 commit cb9d7b3

File tree

3 files changed

+135
-0
lines changed

3 files changed

+135
-0
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ install: llamafile/zipalign.1 \
2828
llama.cpp/perplexity/perplexity.1 \
2929
llama.cpp/llava/llava-quantize.1 \
3030
o/$(MODE)/llamafile/zipalign \
31+
o/$(MODE)/llamafile/tokenize \
3132
o/$(MODE)/llama.cpp/main/main \
3233
o/$(MODE)/llama.cpp/imatrix/imatrix \
3334
o/$(MODE)/llama.cpp/quantize/quantize \
3435
o/$(MODE)/llama.cpp/perplexity/perplexity \
3536
o/$(MODE)/llama.cpp/llava/llava-quantize
3637
mkdir -p $(PREFIX)/bin
3738
$(INSTALL) o/$(MODE)/llamafile/zipalign $(PREFIX)/bin/zipalign
39+
$(INSTALL) o/$(MODE)/llamafile/tokenize $(PREFIX)/bin/llamafile-tokenize
3840
$(INSTALL) o/$(MODE)/llama.cpp/main/main $(PREFIX)/bin/llamafile
3941
$(INSTALL) o/$(MODE)/llama.cpp/imatrix/imatrix $(PREFIX)/bin/llamafile-imatrix
4042
$(INSTALL) o/$(MODE)/llama.cpp/quantize/quantize $(PREFIX)/bin/llamafile-quantize

llamafile/BUILD.mk

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ o/$(MODE)/llamafile/simple: \
3232
o/$(MODE)/llama.cpp/llava/llava.a \
3333
o/$(MODE)/llama.cpp/llama.cpp.a
3434

35+
o/$(MODE)/llamafile/tokenize: \
36+
o/$(MODE)/llamafile/tokenize.o \
37+
o/$(MODE)/llama.cpp/llava/llava.a \
38+
o/$(MODE)/llama.cpp/llama.cpp.a
39+
3540
.PHONY: o/$(MODE)/llamafile
3641
o/$(MODE)/llamafile: \
3742
$(LLAMAFILE_OBJS) \

llamafile/tokenize.cpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
2+
// vi: set et ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi
3+
//
4+
// Copyright 2024 Mozilla Foundation
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
18+
#include <cerrno>
19+
#include <cmath>
20+
#include <cosmo.h>
21+
#include <cstdio>
22+
#include <cstring>
23+
#include <vector>
24+
25+
#include "llama.cpp/common.h"
26+
#include "llama.cpp/llama.h"
27+
#include "llamafile/llamafile.h"
28+
29+
int main(int argc, char **argv) {
30+
31+
if (llamafile_has(argv, "--version")) {
32+
puts("llamafile-tokenize v" LLAMAFILE_VERSION_STRING);
33+
return 0;
34+
}
35+
36+
llamafile_check_cpu();
37+
log_disable();
38+
39+
gpt_params params;
40+
params.n_ctx = 0;
41+
42+
if (!gpt_params_parse(argc, argv, params))
43+
return 1;
44+
45+
llama_model_params model_params = llama_model_default_params();
46+
llama_model *model = llama_load_model_from_file(params.model.c_str(), model_params);
47+
if (model == NULL)
48+
return 3;
49+
50+
llama_context_params ctx_params = llama_context_params_from_gpt_params(params);
51+
llama_context *ctx = llama_new_context_with_model(model, ctx_params);
52+
if (ctx == NULL)
53+
return 4;
54+
55+
bool should_read_stdin = params.prompt.empty();
56+
57+
for (;;) {
58+
ssize_t n;
59+
char buf[4097];
60+
const char *input;
61+
if (should_read_stdin) {
62+
n = read(0, buf, 4096);
63+
if (n == -1) {
64+
fprintf(stderr, "/dev/stdin: %s\n", strerror(errno));
65+
exit(1);
66+
}
67+
if (!n)
68+
break;
69+
buf[n] = 0;
70+
input = buf;
71+
} else {
72+
input = params.prompt.c_str();
73+
}
74+
75+
std::vector<llama_token> toks = ::llama_tokenize(ctx, input, false);
76+
for (llama_token tok : toks) {
77+
std::string str = llama_token_to_piece(ctx, tok, true);
78+
const char *s = str.c_str();
79+
for (int i = 0; s[i]; ++i) {
80+
int c = s[i] & 255;
81+
switch (c) {
82+
case '\\':
83+
fputc('\\', stdout);
84+
fputc('\\', stdout);
85+
break;
86+
case '\a':
87+
fputc('\\', stdout);
88+
fputc('b', stdout);
89+
break;
90+
case '\e':
91+
fputc('\\', stdout);
92+
fputc('e', stdout);
93+
break;
94+
case '\v':
95+
fputc('\\', stdout);
96+
fputc('v', stdout);
97+
break;
98+
case '\t':
99+
fputc('\\', stdout);
100+
fputc('t', stdout);
101+
break;
102+
case '\r':
103+
fputc('\\', stdout);
104+
fputc('r', stdout);
105+
break;
106+
case '\n':
107+
fputc('\\', stdout);
108+
fputc('n', stdout);
109+
break;
110+
default:
111+
if (isascii(c) && iscntrl(c)) {
112+
fputc('\\', stdout);
113+
fputc('0' + ((c & 0300) >> 6), stdout);
114+
fputc('0' + ((c & 0070) >> 3), stdout);
115+
fputc('0' + ((c & 0007) >> 0), stdout);
116+
} else {
117+
fputc(c, stdout);
118+
}
119+
break;
120+
}
121+
}
122+
fputc('\n', stdout);
123+
}
124+
}
125+
126+
llama_free(ctx);
127+
llama_free_model(model);
128+
}

0 commit comments

Comments
 (0)